This is one of the models that were implemented during the experimentation stage. This model was not implemented in the end due to problems with saving and loading it into the backend of the web application.
This algorithm uses ResNet18 neural network and is a modified version of a LeNet skin cancer detection algorithm originally by Soham Mazumder.
For this skin cancer detection algorithm HAM10000 ("Human Against Machine with 10000 training images") dataset was used which contains 10,015 dermatoscopic images.
The 7 classes of skin cancer lesions included in this dataset are:
import torch
from torch import nn
import torch.nn.functional as F
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
import numpy as np
import os
import shutil
import pandas as pd
import seaborn as sns
from glob import glob
import imageio
from PIL import Image
import glob
import math
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, precision_recall_fscore_support
from sklearn.preprocessing import LabelEncoder, StandardScaler
import skimage
import scipy.ndimage
from scipy import misc
from scipy import stats
import matplotlib.pyplot as plt
import matplotlib
%matplotlib inline
# cuda
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
device
device(type='cpu')
# importing metadata and checking for its shape
data_dir = "./data/HAM10000"
metadata = pd.read_csv(data_dir + '/HAM10000_metadata.csv')
print(metadata.shape)
# label encoding the seven classes for skin cancers
le = LabelEncoder()
le.fit(metadata['dx'])
LabelEncoder()
print("Classes:", list(le.classes_))
metadata['label'] = le.transform(metadata["dx"])
metadata.sample(10)
(10015, 8) Classes: ['akiec', 'bcc', 'bkl', 'df', 'mel', 'nv', 'vasc']
| lesion_id | image_id | dx | dx_type | age | sex | localization | dataset | label | |
|---|---|---|---|---|---|---|---|---|---|
| 6645 | HAM_0005544 | ISIC_0027791 | nv | follow_up | 55.0 | male | lower extremity | vidir_molemax | 5 |
| 4868 | HAM_0004057 | ISIC_0026635 | nv | follow_up | 65.0 | male | trunk | vidir_molemax | 5 |
| 9658 | HAM_0001956 | ISIC_0030038 | nv | consensus | 30.0 | female | back | vidir_modern | 5 |
| 1737 | HAM_0006802 | ISIC_0032610 | mel | histo | 40.0 | male | upper extremity | vidir_modern | 4 |
| 1122 | HAM_0004317 | ISIC_0029039 | df | histo | 50.0 | female | lower extremity | rosendahl | 3 |
| 544 | HAM_0002722 | ISIC_0031289 | bkl | histo | 55.0 | male | back | rosendahl | 2 |
| 5858 | HAM_0003297 | ISIC_0024373 | nv | follow_up | 60.0 | male | abdomen | vidir_molemax | 5 |
| 3891 | HAM_0002165 | ISIC_0031172 | nv | follow_up | 55.0 | male | lower extremity | vidir_molemax | 5 |
| 446 | HAM_0005558 | ISIC_0030812 | bkl | histo | 85.0 | male | face | rosendahl | 2 |
| 6622 | HAM_0001970 | ISIC_0031136 | nv | follow_up | 50.0 | male | trunk | vidir_molemax | 5 |
fig = plt.figure(figsize=(40,25))
ax1 = fig.add_subplot(221)
metadata['dx'].value_counts().plot(kind='bar', ax=ax1)
ax1.set_ylabel('Count', size=50)
ax1.set_title('Cell Type', size = 50)
ax2 = fig.add_subplot(222)
metadata['sex'].value_counts().plot(kind='bar', ax=ax2)
ax2.set_ylabel('Count', size=50)
ax2.set_title('Sex', size=50);
ax3 = fig.add_subplot(223)
metadata['localization'].value_counts().plot(kind='bar')
ax3.set_ylabel('Count', size=50)
ax3.set_title('Localization', size=50)
ax4 = fig.add_subplot(224)
sample_age = metadata[pd.notnull(metadata['age'])]
sns.distplot(sample_age['age'], fit=stats.norm, color='red');
ax4.set_title('Age', size = 50)
ax4.set_xlabel('Year', size=50)
plt.tight_layout()
plt.show()
C:\Users\marty\anaconda3\envs\newEnv1\lib\site-packages\seaborn\distributions.py:2557: FutureWarning: `distplot` is a deprecated function and will be removed in a future version. Please adapt your code to use either `displot` (a figure-level function with similar flexibility) or `histplot` (an axes-level function for histograms). warnings.warn(msg, FutureWarning)
Commented out function below takes the the HAM10000 dataset in its original form (data/HAM10000) and sorts all the images based on the type of skin cancer (data/HAM10K). It is commented out as it is already done and does not need to be run again.
# A path to the folder which has all the images:
data_dir = os.getcwd() + "/data/HAM10000/"
image_dir = data_dir + "Images/"
# A path to the folder where you want to store the rearranged images:
dest_dir = os.getcwd() + "/data/HAM10K"
# Read the metadata file:
metadata = pd.read_csv(data_dir + '/HAM10000_metadata.csv')
label = ['bkl', 'nv', 'df', 'mel', 'vasc', 'bcc', 'akiec']
label_images = []
# os.mkdir(dest_dir)
# for i in label:
# os.mkdir(dest_dir + "/" + str(i) + "/")
# # Copy the images into new folder structure:
# for i in label:
# sample = metadata[metadata['dx'] == i]['image_id']
# label_images.extend(sample)
# for id in label_images:
# shutil.copyfile((image_dir + id +".jpg"), (dest_dir + "/" + i + "/"+id+".jpg"))
# label_images=[]
label = [ 'akiec', 'bcc','bkl','df','mel', 'nv', 'vasc']
label_images = []
classes = [ 'actinic keratoses', 'basal cell carcinoma', 'benign keratosis-like lesions',
'dermatofibroma','melanoma', 'melanocytic nevi', 'vascular lesions']
fig = plt.figure(figsize=(55, 55))
k = range(7)
for i in label:
sample = metadata[metadata['dx'] == i]['image_id'][:5]
label_images.extend(sample)
for position,ID in enumerate(label_images):
labl = metadata[metadata['image_id'] == ID]['dx']
im_sample = dest_dir + "/" + labl.values[0] + f'/{ID}.jpg'
im_sample = imageio.imread(im_sample)
plt.subplot(7,5,position+1)
plt.imshow(im_sample)
plt.axis('off')
if position%5 == 0:
title = int(position/5)
plt.title(classes[title], loc='left', size=50, weight="bold")
plt.tight_layout()
plt.show()